# -*- coding: utf-8 -*-

import time
import re
import csv 
import os

os.chdir(os.path.expanduser("~/Dropbox/Research/termlimited/data/CA"))


write_or_append = 'w'
outputfilename = 'ca_bills.csv'
file = open(outputfilename, write_or_append) # file is used for appending
output = csv.writer( file, delimiter=';', dialect='excel')
if write_or_append=='w':
	output.writerow(['session', 'bill', 'sponsor', 'subject'])


for session in ['1993_1994', '1995_1996', '1997_1998', ]:
	with open('CA_bills_'+session+'.txt') as f:
		for line in f:
			if re.search('^([a-z]+[ ]*[0-9]*)',line.lower() ):
				bill = re.search('^([a-z]+[ ]*[ 0-9]*)',line.lower()).group(1).strip()
				tmp =  re.search(bill+'(.*)',line.lower().replace('(','').replace(')','') ).group(1).strip()
				sponsor = tmp.split('  ')[0]				
				subject = re.search(sponsor+'(.*)',line.lower().replace('(','').replace(')','') ).group(1)
				print session , bill, sponsor
				output.writerow([session, bill, sponsor, subject])


